import requests
import re

sessions = requests.Session()
url_index = 'https://blog.csdn.net/nav/python'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    'Referer': 'https://blog.csdn.net/nav/python',
}
response = sessions.get(url=url_index, headers=headers)


# 异步Ajax请求，range为页码
for page in range(1, 51):
    url_baidu = 'https://so.csdn.net/api/v3/search?q=python&t=all&p={}&s=0&tm=0&lv=-1&ft=0&l=&u=&ct=-1&pnt=-1&ry=-1&ss=-1&dct=-1&platform=pc'.format(page)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
        'Referer': 'https://so.csdn.net/so/search?q=python&t=&u=',
    }

    response = sessions.post(url=url_baidu, headers=headers)

    # 遍历每页的文章的url和title
    for article in range(0, 30):
        url_article = response.json()['result_vos'][article]['url']
        title = re.sub(r'[</em><em>*]', '', response.json()['result_vos'][article]['title'])
        response_1 = sessions.post(url=url_article, headers=headers)
        s = response_1.text.encode()

        # 储存文章的html
        filepath = 'blog_%s.html' % title
        with open(filepath, 'wb') as f:
            f.write(s)
        print(title)